From 74777c1274855c5673269f494c418e7aa1378aa4 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Fri, 2 Jun 2017 08:58:08 -0700 Subject: [PATCH] Remove more allocatoins in index querying Removing some allocations arounds the stored hashes by having nested hash maps instead of tuple keys. Also remove an intermediate array when parsing dependencies through a custom implementation of `Deserialize`. While this doesn't make this code path blazingly fast it definitely knocks it down in the profiles below other higher-value targets. --- Cargo.lock | 7 +++ Cargo.toml | 1 + src/cargo/lib.rs | 1 + src/cargo/sources/registry/index.rs | 69 +++++++------------- src/cargo/sources/registry/mod.rs | 97 +++++++++++++++++++++++++++-- src/cargo/util/paths.rs | 15 +++-- 6 files changed, 130 insertions(+), 60 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 02a162478..18b2ab060 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -28,6 +28,7 @@ dependencies = [ "openssl 0.9.13 (registry+https://github.com/rust-lang/crates.io-index)", "psapi-sys 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)", "rustc-serialize 0.3.24 (registry+https://github.com/rust-lang/crates.io-index)", + "scoped-tls 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)", "semver 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)", "serde 1.0.8 (registry+https://github.com/rust-lang/crates.io-index)", "serde_derive 1.0.8 (registry+https://github.com/rust-lang/crates.io-index)", @@ -623,6 +624,11 @@ name = "rustc-serialize" version = "0.3.24" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "scoped-tls" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" + [[package]] name = "semver" version = "0.7.0" @@ -921,6 +927,7 @@ dependencies = [ "checksum regex-syntax 0.4.1 (registry+https://github.com/rust-lang/crates.io-index)" = "ad890a5eef7953f55427c50575c680c42841653abd2b028b68cd223d157f62db" "checksum rustc-demangle 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)" = "3058a43ada2c2d0b92b3ae38007a2d0fa5e9db971be260e0171408a4ff471c95" "checksum rustc-serialize 0.3.24 (registry+https://github.com/rust-lang/crates.io-index)" = "dcf128d1287d2ea9d80910b5f1120d0b8eede3fbf1abe91c40d39ea7d51e6fda" +"checksum scoped-tls 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "f417c22df063e9450888a7561788e9bd46d3bb3c1466435b4eccb903807f147d" "checksum semver 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "3fdd61b85a0fa777f7fb7c454b9189b2941b110d1385ce84d7f76efdf1606a85" "checksum semver-parser 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "388a1df253eca08550bef6c72392cfe7c30914bf41df5269b68cbd6ff8f570a3" "checksum serde 1.0.8 (registry+https://github.com/rust-lang/crates.io-index)" = "c2f530d36fb84ec48fb7146936881f026cdbf4892028835fd9398475f82c1bb4" diff --git a/Cargo.toml b/Cargo.toml index 7a4065cca..894f6e04c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -35,6 +35,7 @@ libgit2-sys = "0.6" log = "0.3" num_cpus = "1.0" rustc-serialize = "0.3" +scoped-tls = "0.1" semver = { version = "0.7.0", features = ["serde"] } serde = "1.0" serde_derive = "1.0" diff --git a/src/cargo/lib.rs b/src/cargo/lib.rs index 8e723a9a4..2fe2a572b 100755 --- a/src/cargo/lib.rs +++ b/src/cargo/lib.rs @@ -5,6 +5,7 @@ #[cfg(test)] extern crate hamcrest; #[macro_use] extern crate error_chain; #[macro_use] extern crate log; +#[macro_use] extern crate scoped_tls; #[macro_use] extern crate serde_derive; #[macro_use] extern crate serde_json; extern crate crates_io as registry; diff --git a/src/cargo/sources/registry/index.rs b/src/cargo/sources/registry/index.rs index 413502467..14c8ab7d6 100644 --- a/src/cargo/sources/registry/index.rs +++ b/src/cargo/sources/registry/index.rs @@ -3,10 +3,11 @@ use std::path::Path; use std::str; use serde_json; +use semver::Version; -use core::dependency::{Dependency, DependencyInner, Kind}; +use core::dependency::Dependency; use core::{SourceId, Summary, PackageId}; -use sources::registry::{RegistryPackage, RegistryDependency, INDEX_LOCK}; +use sources::registry::{RegistryPackage, INDEX_LOCK}; use sources::registry::RegistryData; use util::{CargoError, CargoResult, internal, Filesystem, Config}; @@ -14,7 +15,7 @@ pub struct RegistryIndex<'cfg> { source_id: SourceId, path: Filesystem, cache: HashMap>, - hashes: HashMap<(String, String), String>, // (name, vers) => cksum + hashes: HashMap>, // (name, vers) => cksum config: &'cfg Config, locked: bool, } @@ -40,13 +41,14 @@ impl<'cfg> RegistryIndex<'cfg> { pkg: &PackageId, load: &mut RegistryData) -> CargoResult { - let key = (pkg.name().to_string(), pkg.version().to_string()); - if let Some(s) = self.hashes.get(&key) { + let name = pkg.name(); + let version = pkg.version(); + if let Some(s) = self.hashes.get(name).and_then(|v| v.get(version)) { return Ok(s.clone()) } // Ok, we're missing the key, so parse the index file to load it. - self.summaries(pkg.name(), load)?; - self.hashes.get(&key).ok_or_else(|| { + self.summaries(name, load)?; + self.hashes.get(name).and_then(|v| v.get(version)).ok_or_else(|| { internal(format!("no hash listed for {}", pkg)) }).map(|s| s.clone()) } @@ -103,6 +105,7 @@ impl<'cfg> RegistryIndex<'cfg> { let contents = str::from_utf8(contents).map_err(|_| { CargoError::from("registry index file was not valid utf-8") })?; + ret.reserve(contents.lines().count()); let lines = contents.lines() .map(|s| s.trim()) .filter(|l| !l.is_empty()); @@ -137,52 +140,22 @@ impl<'cfg> RegistryIndex<'cfg> { -> CargoResult<(Summary, bool)> { let RegistryPackage { name, vers, cksum, deps, features, yanked - } = serde_json::from_str::(line)?; + } = super::DEFAULT_ID.set(&self.source_id, || { + serde_json::from_str::(line) + })?; let pkgid = PackageId::new(&name, &vers, &self.source_id)?; - let deps: CargoResult> = deps.into_iter().map(|dep| { - self.parse_registry_dependency(dep) - }).collect(); - let deps = deps?; - let summary = Summary::new(pkgid, deps, features)?; + let summary = Summary::new(pkgid, deps.inner, features)?; let summary = summary.set_checksum(cksum.clone()); - self.hashes.insert((name, vers), cksum); + if self.hashes.contains_key(&name[..]) { + self.hashes.get_mut(&name[..]).unwrap().insert(vers, cksum); + } else { + self.hashes.entry(name.into_owned()) + .or_insert_with(HashMap::new) + .insert(vers, cksum); + } Ok((summary, yanked.unwrap_or(false))) } - /// Converts an encoded dependency in the registry to a cargo dependency - fn parse_registry_dependency(&self, dep: RegistryDependency) - -> CargoResult { - let RegistryDependency { - name, req, features, optional, default_features, target, kind - } = dep; - - let mut dep = DependencyInner::parse(&name, Some(&req), &self.source_id, None)?; - let kind = match kind.as_ref().map(|s| &s[..]).unwrap_or("") { - "dev" => Kind::Development, - "build" => Kind::Build, - _ => Kind::Normal, - }; - - let platform = match target { - Some(target) => Some(target.parse()?), - None => None, - }; - - // Unfortunately older versions of cargo and/or the registry ended up - // publishing lots of entries where the features array contained the - // empty feature, "", inside. This confuses the resolution process much - // later on and these features aren't actually valid, so filter them all - // out here. - let features = features.into_iter().filter(|s| !s.is_empty()).collect(); - - dep.set_optional(optional) - .set_default_features(default_features) - .set_features(features) - .set_platform(platform) - .set_kind(kind); - Ok(dep.into_dependency()) - } - pub fn query(&mut self, dep: &Dependency, load: &mut RegistryData, diff --git a/src/cargo/sources/registry/mod.rs b/src/cargo/sources/registry/mod.rs index 24b5daf96..84f801ed8 100644 --- a/src/cargo/sources/registry/mod.rs +++ b/src/cargo/sources/registry/mod.rs @@ -160,14 +160,17 @@ use std::borrow::Cow; use std::collections::HashMap; +use std::fmt; use std::fs::File; use std::path::{PathBuf, Path}; use flate2::read::GzDecoder; +use semver::Version; +use serde::de; use tar::Archive; use core::{Source, SourceId, PackageId, Package, Summary, Registry}; -use core::dependency::Dependency; +use core::dependency::{Dependency, DependencyInner, Kind}; use sources::PathSource; use util::{CargoResult, Config, internal, FileLock, Filesystem}; use util::errors::CargoResultExt; @@ -200,14 +203,18 @@ pub struct RegistryConfig { #[derive(Deserialize)] struct RegistryPackage<'a> { - name: String, - vers: String, - deps: Vec>, + name: Cow<'a, str>, + vers: Version, + deps: DependencyList, features: HashMap>, cksum: String, yanked: Option, } +struct DependencyList { + inner: Vec, +} + #[derive(Deserialize)] struct RegistryDependency<'a> { name: Cow<'a, str>, @@ -397,3 +404,85 @@ impl<'cfg> Source for RegistrySource<'cfg> { Ok(pkg.package_id().version().to_string()) } } + +// TODO: this is pretty unfortunate, ideally we'd use `DeserializeSeed` which +// is intended for "deserializing with context" but that means we couldn't +// use `#[derive(Deserialize)]` on `RegistryPackage` unfortunately. +// +// I'm told, however, that https://github.com/serde-rs/serde/pull/909 will solve +// all our problems here. Until that lands this thread local is just a +// workaround in the meantime. +// +// If you're reading this and find this thread local funny, check to see if that +// PR is merged. If it is then let's ditch this thread local! +scoped_thread_local!(static DEFAULT_ID: SourceId); + +impl<'de> de::Deserialize<'de> for DependencyList { + fn deserialize(deserializer: D) -> Result + where D: de::Deserializer<'de>, + { + return deserializer.deserialize_seq(Visitor); + + struct Visitor; + + impl<'de> de::Visitor<'de> for Visitor { + type Value = DependencyList; + + fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + write!(formatter, "a list of dependencies") + } + + fn visit_seq(self, mut seq: A) -> Result + where A: de::SeqAccess<'de>, + { + let mut ret = Vec::new(); + if let Some(size) = seq.size_hint() { + ret.reserve(size); + } + while let Some(element) = seq.next_element::()? { + ret.push(parse_registry_dependency(element).map_err(|e| { + de::Error::custom(e) + })?); + } + + Ok(DependencyList { inner: ret }) + } + } + } +} + +/// Converts an encoded dependency in the registry to a cargo dependency +fn parse_registry_dependency(dep: RegistryDependency) + -> CargoResult { + let RegistryDependency { + name, req, features, optional, default_features, target, kind + } = dep; + + let mut dep = DEFAULT_ID.with(|id| { + DependencyInner::parse(&name, Some(&req), id, None) + })?; + let kind = match kind.as_ref().map(|s| &s[..]).unwrap_or("") { + "dev" => Kind::Development, + "build" => Kind::Build, + _ => Kind::Normal, + }; + + let platform = match target { + Some(target) => Some(target.parse()?), + None => None, + }; + + // Unfortunately older versions of cargo and/or the registry ended up + // publishing lots of entries where the features array contained the + // empty feature, "", inside. This confuses the resolution process much + // later on and these features aren't actually valid, so filter them all + // out here. + let features = features.into_iter().filter(|s| !s.is_empty()).collect(); + + dep.set_optional(optional) + .set_default_features(default_features) + .set_features(features) + .set_platform(platform) + .set_kind(kind); + Ok(dep.into_dependency()) +} diff --git a/src/cargo/util/paths.rs b/src/cargo/util/paths.rs index fecc166a5..988c4b9b4 100644 --- a/src/cargo/util/paths.rs +++ b/src/cargo/util/paths.rs @@ -69,20 +69,19 @@ pub fn without_prefix<'a>(a: &'a Path, b: &'a Path) -> Option<&'a Path> { } pub fn read(path: &Path) -> CargoResult { - (|| -> CargoResult<_> { - let mut ret = String::new(); - let mut f = File::open(path)?; - f.read_to_string(&mut ret)?; - Ok(ret) - })().chain_err(|| { - format!("failed to read `{}`", path.display()) - }) + match String::from_utf8(read_bytes(path)?) { + Ok(s) => Ok(s), + Err(_) => bail!("path at `{}` was not valid utf-8", path.display()), + } } pub fn read_bytes(path: &Path) -> CargoResult> { (|| -> CargoResult<_> { let mut ret = Vec::new(); let mut f = File::open(path)?; + if let Ok(m) = f.metadata() { + ret.reserve(m.len() as usize + 1); + } f.read_to_end(&mut ret)?; Ok(ret) })().chain_err(|| { -- 2.30.2